# library(BiocManager)
# BiocManager::install("DESeq2")
library(dplyr)

Attaching package: ‘dplyr’

The following object is masked from ‘package:Biobase’:

    combine

The following objects are masked from ‘package:BiocGenerics’:

    combine, intersect, setdiff, union

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(readr)
library(DESeq2)
Loading required package: S4Vectors
Loading required package: stats4

Attaching package: ‘S4Vectors’

The following objects are masked from ‘package:dplyr’:

    first, rename

The following objects are masked from ‘package:base’:

    expand.grid, I, unname

Loading required package: IRanges

Attaching package: ‘IRanges’

The following objects are masked from ‘package:dplyr’:

    collapse, desc, slice

Loading required package: GenomicRanges
Loading required package: GenomeInfoDb
Loading required package: SummarizedExperiment
Loading required package: MatrixGenerics
Loading required package: matrixStats

Attaching package: ‘matrixStats’

The following object is masked from ‘package:dplyr’:

    count

The following objects are masked from ‘package:Biobase’:

    anyMissing, rowMedians


Attaching package: ‘MatrixGenerics’

The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, colCounts, colCummaxs, colCummins, colCumprods,
    colCumsums, colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs, colMads, colMaxs, colMeans2, colMedians,
    colMins, colOrderStats, colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds, colSums2, colTabulates,
    colVarDiffs, colVars, colWeightedMads, colWeightedMeans, colWeightedMedians, colWeightedSds, colWeightedVars,
    rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet, rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians,
    rowMins, rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, rowSdDiffs, rowSds, rowSums2, rowTabulates,
    rowVarDiffs, rowVars, rowWeightedMads, rowWeightedMeans, rowWeightedMedians, rowWeightedSds, rowWeightedVars

The following object is masked from ‘package:Biobase’:

    rowMedians
library(readr)

gene_data <- read_csv('Breast_GSE45827.csv')
Rows: 151 Columns: 54677
── Column specification ─────────────────────────────────────────────
Delimiter: ","
chr     (1): type
dbl (54676): samples, 1007_s_at, 1053_at, 117_at, 121_at, 1255_g_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(gene_data)
binary_class_data <- gene_data %>% 
  mutate(type = case_when(
    type == 'normal' ~ 'healthy',
    TRUE ~ 'cancer'
    )
  )

binary_class_data
binary_class_data <- as.data.frame(t(binary_class_data))
head(binary_class_data)
colnames(binary_class_data) <- binary_class_data[1,]
coldata <- as.data.frame(t(binary_class_data[c(2),]))
binary_class_data <- binary_class_data[-c(1,2),]

binary_class_data
coldata
converted_binary <- type.convert(binary_class_data)
converted_binary
has.neg <- apply(converted_binary, 1, function(row) any(row < 0))
which(has.neg)
dds <- DESeqDataSetFromMatrix(countData = converted_binary,
                              colData = coldata,
                              design = ~ type)
dds
library(affy)
gene_raw_data <- ReadAffy(celfile.path = "/Users/alex/Documents/GitHub/Breast-Cancer-Gene-Regulatory-Network-and-Risk-Prediction/Data/GSE45827_RAW/")
assay_data <- assayData(gene_raw_data)$exprs
pheno_data <- phenoData(gene_raw_data)
feature_data <- featureData(gene_raw_data)
arraysRMA = rma(gene_raw_data, normalize = FALSE)
log_arraysRMAtable=exprs(arraysRMA)
count_table = as.data.frame(round(2^log_arraysRMAtable))
head(count_table)
colnames(count_table) <- substr(colnames(count_table),8,10)
head(count_table)
count_table <- count_table[-c(3,6,8,10,13,16,19,24,28,31,34,36,42,48,50,52,54,57,62,71,76,82,90)]
count_table
colnames(count_table) <- as.integer(colnames(count_table))
count_table
coldata <- as.data.frame(as.integer(colnames(count_table)))

colnames(coldata) <- 'Sample'

coldata <- coldata %>% 
  mutate(type = case_when(
    Sample > 168 & Sample < 180 ~ 'non-cancer',
    TRUE ~ 'cancer'
    )
  )

coldata
dds <- DESeqDataSetFromMatrix(countData = count_table,
                              colData = coldata,
                              design = ~ type)
dds
# Pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
dds$type <- relevel(dds$type, ref = "non-cancer")
dds <- DESeq(dds)
res <- results(dds)
res
resultsNames(dds)
# Log Fold Change Shrink
resLFC <- lfcShrink(dds, coef="type_cancer_vs_non.cancer", type="apeglm")
resLFC
resOrdered <- res[order(res$pvalue),]
resOrdered
log2 fold change (MLE): type cancer vs non.cancer 
Wald test p-value: type cancer vs non.cancer 
DataFrame with 54675 rows and 6 columns
              baseMean log2FoldChange     lfcSE         stat       pvalue         padj
             <numeric>      <numeric> <numeric>    <numeric>    <numeric>    <numeric>
243689_s_at    25.9957       -3.89543  0.139157     -27.9930 1.97637e-172 1.08058e-167
224012_at      17.1237       -3.14268  0.129003     -24.3614 4.38935e-131 1.19994e-126
207092_at      85.7769       -5.70338  0.236180     -24.1484 7.75292e-129 1.41297e-124
211565_at      34.8702       -3.25637  0.136013     -23.9416 1.13155e-126 1.54669e-122
1552509_a_at   27.1727       -4.41723  0.185255     -23.8441 1.16619e-125 1.27523e-121
...                ...            ...       ...          ...          ...          ...
215244_at      22.8869    5.79663e-05 0.1330159  4.35785e-04     0.999652     0.999725
216200_at      14.0919   -4.95852e-05 0.1628031 -3.04572e-04     0.999757     0.999812
205735_s_at    18.0396    4.60511e-05 0.1960345  2.34913e-04     0.999813     0.999849
1553701_a_at   82.9793   -1.54787e-05 0.0804177 -1.92479e-04     0.999846     0.999865
1555629_at     22.8510   -1.39734e-05 0.1515850 -9.21817e-05     0.999926     0.999926
GDE = data.frame(resOrdered$log2FoldChange, resOrdered$pvalue)
rownames(GDE) = resOrdered@rownames
colnames(GDE) = c('Log2FoldChange','Ordered$pvalue')
GDE
write.csv(GDE, "GDE_Outcome.csv", row.names = TRUE)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMgbGlicmFyeShCaW9jTWFuYWdlcikKIyBCaW9jTWFuYWdlcjo6aW5zdGFsbCgiREVTZXEyIikKbGlicmFyeShkcGx5cikKbGlicmFyeShyZWFkcikKbGlicmFyeShERVNlcTIpCmBgYAoKYGBge3J9CmdlbmVfZGF0YSA8LSByZWFkX2NzdignQnJlYXN0X0dTRTQ1ODI3LmNzdicpCmhlYWQoZ2VuZV9kYXRhKQpgYGAKCmBgYHtyfQpiaW5hcnlfY2xhc3NfZGF0YSA8LSBnZW5lX2RhdGEgJT4lIAogIG11dGF0ZSh0eXBlID0gY2FzZV93aGVuKAogICAgdHlwZSA9PSAnbm9ybWFsJyB+ICdoZWFsdGh5JywKICAgIFRSVUUgfiAnY2FuY2VyJwogICAgKQogICkKCmJpbmFyeV9jbGFzc19kYXRhCmBgYAoKYGBge3J9CmJpbmFyeV9jbGFzc19kYXRhIDwtIGFzLmRhdGEuZnJhbWUodChiaW5hcnlfY2xhc3NfZGF0YSkpCmhlYWQoYmluYXJ5X2NsYXNzX2RhdGEpCmBgYAoKYGBge3J9CmNvbG5hbWVzKGJpbmFyeV9jbGFzc19kYXRhKSA8LSBiaW5hcnlfY2xhc3NfZGF0YVsxLF0KY29sZGF0YSA8LSBhcy5kYXRhLmZyYW1lKHQoYmluYXJ5X2NsYXNzX2RhdGFbYygyKSxdKSkKYmluYXJ5X2NsYXNzX2RhdGEgPC0gYmluYXJ5X2NsYXNzX2RhdGFbLWMoMSwyKSxdCgpiaW5hcnlfY2xhc3NfZGF0YQpjb2xkYXRhCmBgYAoKYGBge3J9CmNvbnZlcnRlZF9iaW5hcnkgPC0gdHlwZS5jb252ZXJ0KGJpbmFyeV9jbGFzc19kYXRhKQpjb252ZXJ0ZWRfYmluYXJ5CmBgYAoKYGBge3J9Cmhhcy5uZWcgPC0gYXBwbHkoY29udmVydGVkX2JpbmFyeSwgMSwgZnVuY3Rpb24ocm93KSBhbnkocm93IDwgMCkpCndoaWNoKGhhcy5uZWcpCmBgYAoKYGBge3J9CmRkcyA8LSBERVNlcURhdGFTZXRGcm9tTWF0cml4KGNvdW50RGF0YSA9IGNvbnZlcnRlZF9iaW5hcnksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbERhdGEgPSBjb2xkYXRhLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkZXNpZ24gPSB+IHR5cGUpCmRkcwpgYGAKCmBgYHtyfQpsaWJyYXJ5KGFmZnkpCmdlbmVfcmF3X2RhdGEgPC0gUmVhZEFmZnkoY2VsZmlsZS5wYXRoID0gIi9Vc2Vycy9hbGV4L0RvY3VtZW50cy9HaXRIdWIvQnJlYXN0LUNhbmNlci1HZW5lLVJlZ3VsYXRvcnktTmV0d29yay1hbmQtUmlzay1QcmVkaWN0aW9uL0RhdGEvR1NFNDU4MjdfUkFXLyIpCmBgYAoKYGBge3J9CmFzc2F5X2RhdGEgPC0gYXNzYXlEYXRhKGdlbmVfcmF3X2RhdGEpJGV4cHJzCnBoZW5vX2RhdGEgPC0gcGhlbm9EYXRhKGdlbmVfcmF3X2RhdGEpCmZlYXR1cmVfZGF0YSA8LSBmZWF0dXJlRGF0YShnZW5lX3Jhd19kYXRhKQpgYGAKCmBgYHtyfQphcnJheXNSTUEgPSBybWEoZ2VuZV9yYXdfZGF0YSwgbm9ybWFsaXplID0gRkFMU0UpCmBgYAoKYGBge3J9CmxvZ19hcnJheXNSTUF0YWJsZT1leHBycyhhcnJheXNSTUEpCmNvdW50X3RhYmxlID0gYXMuZGF0YS5mcmFtZShyb3VuZCgyXmxvZ19hcnJheXNSTUF0YWJsZSkpCmhlYWQoY291bnRfdGFibGUpCmBgYAoKYGBge3J9CmNvbG5hbWVzKGNvdW50X3RhYmxlKSA8LSBzdWJzdHIoY29sbmFtZXMoY291bnRfdGFibGUpLDgsMTApCmhlYWQoY291bnRfdGFibGUpCmBgYAoKYGBge3J9CmNvdW50X3RhYmxlIDwtIGNvdW50X3RhYmxlWy1jKDMsNiw4LDEwLDEzLDE2LDE5LDI0LDI4LDMxLDM0LDM2LDQyLDQ4LDUwLDUyLDU0LDU3LDYyLDcxLDc2LDgyLDkwKV0KY291bnRfdGFibGUKYGBgCgpgYGB7cn0KY29sbmFtZXMoY291bnRfdGFibGUpIDwtIGFzLmludGVnZXIoY29sbmFtZXMoY291bnRfdGFibGUpKQpjb3VudF90YWJsZQpgYGAKCmBgYHtyfQpjb2xkYXRhIDwtIGFzLmRhdGEuZnJhbWUoYXMuaW50ZWdlcihjb2xuYW1lcyhjb3VudF90YWJsZSkpKQoKY29sbmFtZXMoY29sZGF0YSkgPC0gJ1NhbXBsZScKCmNvbGRhdGEgPC0gY29sZGF0YSAlPiUgCiAgbXV0YXRlKHR5cGUgPSBjYXNlX3doZW4oCiAgICBTYW1wbGUgPiAxNjggJiBTYW1wbGUgPCAxODAgfiAnbm9uLWNhbmNlcicsCiAgICBUUlVFIH4gJ2NhbmNlcicKICAgICkKICApCgpjb2xkYXRhCmBgYAoKYGBge3J9CmRkcyA8LSBERVNlcURhdGFTZXRGcm9tTWF0cml4KGNvdW50RGF0YSA9IGNvdW50X3RhYmxlLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2xEYXRhID0gY29sZGF0YSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGVzaWduID0gfiB0eXBlKQpkZHMKYGBgCgpgYGB7cn0KIyBQcmUtZmlsdGVyaW5nCmtlZXAgPC0gcm93U3Vtcyhjb3VudHMoZGRzKSkgPj0gMTAKZGRzIDwtIGRkc1trZWVwLF0KYGBgCgpgYGB7cn0KZGRzJHR5cGUgPC0gcmVsZXZlbChkZHMkdHlwZSwgcmVmID0gIm5vbi1jYW5jZXIiKQpgYGAKCmBgYHtyfQpkZHMgPC0gREVTZXEoZGRzKQpyZXMgPC0gcmVzdWx0cyhkZHMpCnJlcwpgYGAKCmBgYHtyfQpyZXN1bHRzTmFtZXMoZGRzKQpgYGAKCmBgYHtyfQojIExvZyBGb2xkIENoYW5nZSBTaHJpbmsKcmVzTEZDIDwtIGxmY1NocmluayhkZHMsIGNvZWY9InR5cGVfY2FuY2VyX3ZzX25vbi5jYW5jZXIiLCB0eXBlPSJhcGVnbG0iKQpyZXNMRkMKYGBgCgpgYGB7cn0KcmVzT3JkZXJlZCA8LSByZXNbb3JkZXIocmVzJHB2YWx1ZSksXQpyZXNPcmRlcmVkCmBgYAoKYGBge3J9CkdERSA9IGRhdGEuZnJhbWUocmVzT3JkZXJlZCRsb2cyRm9sZENoYW5nZSwgcmVzT3JkZXJlZCRwdmFsdWUpCnJvd25hbWVzKEdERSkgPSByZXNPcmRlcmVkQHJvd25hbWVzCmNvbG5hbWVzKEdERSkgPSBjKCdMb2cyRm9sZENoYW5nZScsJ09yZGVyZWQkcHZhbHVlJykKR0RFCmBgYAoKYGBge3J9CndyaXRlLmNzdihHREUsICJHREVfT3V0Y29tZS5jc3YiLCByb3cubmFtZXMgPSBUUlVFKQpgYGAKCgo=